From 065102282fddfd49c43e94d9940be3cd2cc08859 Mon Sep 17 00:00:00 2001 From: oliskoli Date: Wed, 4 Jul 2007 19:05:03 +0000 Subject: [PATCH] Fill cp1250..cp1257 extra table ("Best Fit" non-ascii codes). --- cet/cp1250.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++---- cet/cp1251.h | 46 +++++++++++++++++++++++++++---- cet/cp1252.h | 66 ++++++++++++++++++++++++++++++++++++++++---- cet/cp1253.h | 35 ++++++++++++++++++++---- cet/cp1254.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++++---- cet/cp1255.h | 44 ++++++++++++++++++++++++++---- cet/cp1256.h | 34 +++++++++++++++++++---- cet/cp1257.h | 19 +++++++++---- 8 files changed, 348 insertions(+), 48 deletions(-) diff --git a/cet/cp1250.h b/cet/cp1250.h index c16ba4d01..4c11b598d 100644 --- a/cet/cp1250.h +++ b/cet/cp1250.h @@ -135,10 +135,73 @@ const cet_ucs4_link_t cet_ucs4_to_cp1250_links[cet_ucs4_to_cp1250_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1250_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1250_extra[cet_ucs4_to_cp1250_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1250.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1250_extra[] = +{ + {0x0189, 0xd0} /* latin capital letter african d */, + {0x02bb, 0x91} /* modifier letter turned comma */, + {0x02c9, 0xaf} /* modifier letter macron */, + {0x02ca, 0xb4} /* modifier letter acute accent */, + {0x02da, 0xb0} /* ring above */, + {0x0301, 0xb4} /* combining acute accent */, + {0x0304, 0xaf} /* combining macron */, + {0x0305, 0xaf} /* combining overline */, + {0x0306, 0xa2} /* combining breve */, + {0x0307, 0xff} /* combining dot above */, + {0x0308, 0xa8} /* combining diaeresis */, + {0x030a, 0xb0} /* combining ring above */, + {0x030c, 0xa1} /* combining caron */, + {0x0327, 0xb8} /* combining cedilla */, + {0x03b2, 0xdf} /* greek small letter beta */, + {0x03bc, 0xb5} /* greek small letter mu */, + {0x2024, 0x95} /* one dot leader */, + {0x2033, 0x94} /* double prime */, + {0x2070, 0xb0} /* superscript zero */, + {0x20a4, 0xa3} /* lira sign */, + {0x212b, 0xc5} /* angstrom sign */, + {0x2190, 0x8b} /* leftwards arrow */, + {0x2192, 0x9b} /* rightwards arrow */, + {0x2193, 0xa1} /* downwards arrow */, + {0x2205, 0xd8} /* empty set */, + {0x2213, 0xb1} /* minus-or-plus sign */, + {0x2218, 0xb0} /* ring operator */, + {0x2219, 0x95} /* bullet operator */, + {0x226a, 0xab} /* much less-than */, + {0x226b, 0xbb} /* much greater-than */, + {0x22c5, 0xb7} /* dot operator */, + {0x2302, 0xa6} /* house */, + {0x2500, 0xa6} /* box drawings light horizontal */, + {0x2510, 0xac} /* box drawings light down and left */, + {0x2551, 0xa6} /* box drawings double vertical */, + {0x2557, 0xac} /* box drawings double down and left */, + {0x2560, 0xa6} /* box drawings double vertical and right */, + {0x2563, 0xa6} /* box drawings double vertical and left */, + {0x2569, 0xa6} /* box drawings double up and horizontal */, + {0x25a0, 0xa6} /* black square */, + {0x25bc, 0xa1} /* black down-pointing triangle */, + {0x25d8, 0x95} /* inverse bullet */, + {0x263a, 0xa2} /* white smiling face */, + {0x263b, 0xa2} /* black smiling face */, + {0x2660, 0xa6} /* black spade suit */, + {0x2663, 0xa6} /* black club suit */, + {0x2665, 0xa6} /* black heart suit */, + {0x2666, 0xa6} /* black diamond suit */, + {0x275b, 0x91} /* heavy single turned comma quotation mark ornament */, + {0x275c, 0x92} /* heavy single comma quotation mark ornament */, + {0x275d, 0x93} /* heavy double turned comma quotation mark ornament */, + {0x275e, 0x94} /* heavy double comma quotation mark ornament */, + {0x300a, 0xab} /* left double angle bracket */, + {0x300b, 0xbb} /* right double angle bracket */, + {0x301d, 0x93} /* reversed double prime quotation mark */, + {0x301e, 0x94} /* double prime quotation mark */, + {0x301f, 0x84} /* low double prime quotation mark */, + {0x30fb, 0xb7} /* katakana middle dot */, + {0x30fc, 0x97} /* katakana-hiragana prolonged sound mark */ +}; + +#define cet_ucs4_to_cp1250_extra_ct sizeof(cet_ucs4_to_cp1250_extra) / sizeof(cet_ucs4_to_cp1250_extra[0]) cet_cs_vec_t cet_cs_vec_cp1250 = /* defined in cet.h */ { @@ -155,8 +218,8 @@ cet_cs_vec_t cet_cs_vec_cp1250 = /* defined in cet.h */ cet_ucs4_to_cp1250_links, /* UCS-4 to char links */ cet_ucs4_to_cp1250_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1250_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1250_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1251.h b/cet/cp1251.h index efa2195c3..4fd0349ac 100644 --- a/cet/cp1251.h +++ b/cet/cp1251.h @@ -173,10 +173,44 @@ const cet_ucs4_link_t cet_ucs4_to_cp1251_links[cet_ucs4_to_cp1251_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1251_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1251_extra[cet_ucs4_to_cp1251_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1251.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1251_extra[] = +{ + {0x2195, 0xa6} /* up down arrow */, + {0x21a8, 0xa6} /* up down arrow with base */, + {0x2219, 0x95} /* bullet operator */, + {0x2302, 0xa6} /* house */, + {0x2502, 0xa6} /* box drawings light vertical */, + {0x2510, 0xac} /* box drawings light down and left */, + {0x2551, 0xa6} /* box drawings double vertical */, + {0x2553, 0xe3} /* box drawings down double and right single */, + {0x2554, 0xe3} /* box drawings double down and right */, + {0x2555, 0xac} /* box drawings down single and left double */, + {0x2556, 0xac} /* box drawings down double and left single */, + {0x2557, 0xac} /* box drawings double down and left */, + {0x255e, 0xa6} /* box drawings vertical single and right double */, + {0x255f, 0xa6} /* box drawings vertical double and right single */, + {0x2560, 0xa6} /* box drawings double vertical and right */, + {0x2561, 0xa6} /* box drawings vertical single and left double */, + {0x2562, 0xa6} /* box drawings vertical double and left single */, + {0x2563, 0xa6} /* box drawings double vertical and left */, + {0x2567, 0xa6} /* box drawings up single and horizontal double */, + {0x2568, 0xa6} /* box drawings up double and horizontal single */, + {0x2569, 0xa6} /* box drawings double up and horizontal */, + {0x258c, 0xa6} /* left half block */, + {0x2590, 0xa6} /* right half block */, + {0x25a0, 0xa6} /* black square */, + {0x25bc, 0xa1} /* black down-pointing triangle */, + {0x25d8, 0x95} /* inverse bullet */, + {0x2660, 0xa6} /* black spade suit */, + {0x2663, 0xa6} /* black club suit */, + {0x2665, 0xa6} /* black heart suit */, + {0x2666, 0xa6} /* black diamond suit */ +}; + +#define cet_ucs4_to_cp1251_extra_ct sizeof(cet_ucs4_to_cp1251_extra) / sizeof(cet_ucs4_to_cp1251_extra[0]) cet_cs_vec_t cet_cs_vec_cp1251 = /* defined in cet.h */ { @@ -193,8 +227,8 @@ cet_cs_vec_t cet_cs_vec_cp1251 = /* defined in cet.h */ cet_ucs4_to_cp1251_links, /* UCS-4 to char links */ cet_ucs4_to_cp1251_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1251_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1251_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1252.h b/cet/cp1252.h index 01f202325..f270bdb05 100644 --- a/cet/cp1252.h +++ b/cet/cp1252.h @@ -88,10 +88,64 @@ const cet_ucs4_link_t cet_ucs4_to_cp1252_links[cet_ucs4_to_cp1252_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1252_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1252_extra[cet_ucs4_to_cp1252_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1252.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1252_extra[] = +{ + {0x0110, 0xd0} /* latin capital letter d with stroke */, + {0x0189, 0xd0} /* latin capital letter african d */, + {0x0191, 0x83} /* latin capital letter f with hook */, + {0x02c9, 0xaf} /* modifier letter macron */, + {0x02ca, 0xb4} /* modifier letter acute accent */, + {0x02da, 0xb0} /* ring above */, + {0x0301, 0xb4} /* combining acute accent */, + {0x0304, 0xaf} /* combining macron */, + {0x0305, 0xaf} /* combining overline */, + {0x0308, 0xa8} /* combining diaeresis */, + {0x030a, 0xb0} /* combining ring above */, + {0x0327, 0xb8} /* combining cedilla */, + {0x03b2, 0xdf} /* greek small letter beta */, + {0x03bc, 0xb5} /* greek small letter mu */, + {0x2024, 0xb7} /* one dot leader */, + {0x2070, 0xb0} /* superscript zero */, + {0x20a1, 0xa2} /* colon sign */, + {0x20a4, 0xa3} /* lira sign */, + {0x212b, 0xc5} /* angstrom sign */, + {0x2205, 0xd8} /* empty set */, + {0x2213, 0xb1} /* minus-or-plus sign */, + {0x2218, 0xb0} /* ring operator */, + {0x2219, 0xb7} /* bullet operator */, + {0x2248, 0x98} /* almost equal to */, + {0x226a, 0xab} /* much less-than */, + {0x226b, 0xbb} /* much greater-than */, + {0x22c5, 0xb7} /* dot operator */, + {0x2302, 0xa6} /* house */, + {0x2310, 0xac} /* reversed not sign */, + {0x2502, 0xa6} /* box drawings light vertical */, + {0x2524, 0xa6} /* box drawings light vertical and left */, + {0x2551, 0xa6} /* box drawings double vertical */, + {0x255e, 0xa6} /* box drawings vertical single and right double */, + {0x255f, 0xa6} /* box drawings vertical double and right single */, + {0x2560, 0xa6} /* box drawings double vertical and right */, + {0x2561, 0xa6} /* box drawings vertical single and left double */, + {0x2562, 0xa6} /* box drawings vertical double and left single */, + {0x2563, 0xa6} /* box drawings double vertical and left */, + {0x2580, 0xaf} /* upper half block */, + {0x2588, 0xa6} /* full block */, + {0x258c, 0xa6} /* left half block */, + {0x2590, 0xa6} /* right half block */, + {0x2591, 0xa6} /* light shade */, + {0x2592, 0xa6} /* medium shade */, + {0x2593, 0xa6} /* dark shade */, + {0x25a0, 0xa6} /* black square */, + {0x263c, 0xa4} /* white sun with rays */, + {0x300a, 0xab} /* left double angle bracket */, + {0x300b, 0xbb} /* right double angle bracket */, + {0x30fb, 0xb7} /* katakana middle dot */ +}; + +#define cet_ucs4_to_cp1252_extra_ct sizeof(cet_ucs4_to_cp1252_extra) / sizeof(cet_ucs4_to_cp1252_extra[0]) cet_cs_vec_t cet_cs_vec_cp1252 = /* defined in cet.h */ { @@ -108,8 +162,8 @@ cet_cs_vec_t cet_cs_vec_cp1252 = /* defined in cet.h */ cet_ucs4_to_cp1252_links, /* UCS-4 to char links */ cet_ucs4_to_cp1252_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1252_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1252_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1253.h b/cet/cp1253.h index 727139847..7f4a1b7f9 100644 --- a/cet/cp1253.h +++ b/cet/cp1253.h @@ -151,10 +151,33 @@ const cet_ucs4_link_t cet_ucs4_to_cp1253_links[cet_ucs4_to_cp1253_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1253_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1253_extra[cet_ucs4_to_cp1253_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1253.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1253_extra[] = +{ + {0x0191, 0x83} /* latin capital letter f with hook */, + {0x030d, 0xb4} /* combining vertical line above */, + {0x2195, 0xa6} /* up down arrow */, + {0x21a8, 0xa6} /* up down arrow with base */, + {0x2302, 0xa6} /* house */, + {0x2502, 0xa6} /* box drawings light vertical */, + {0x2510, 0xac} /* box drawings light down and left */, + {0x2551, 0xa6} /* box drawings double vertical */, + {0x2557, 0xac} /* box drawings double down and left */, + {0x2560, 0xa6} /* box drawings double vertical and right */, + {0x2563, 0xa6} /* box drawings double vertical and left */, + {0x2569, 0xa6} /* box drawings double up and horizontal */, + {0x25a0, 0xa6} /* black square */, + {0x25bc, 0xa1} /* black down-pointing triangle */, + {0x25d8, 0x95} /* inverse bullet */, + {0x2660, 0xa6} /* black spade suit */, + {0x2663, 0xa6} /* black club suit */, + {0x2665, 0xa6} /* black heart suit */, + {0x2666, 0xa6} /* black diamond suit */ +}; + +#define cet_ucs4_to_cp1253_extra_ct sizeof(cet_ucs4_to_cp1253_extra) / sizeof(cet_ucs4_to_cp1253_extra[0]) cet_cs_vec_t cet_cs_vec_cp1253 = /* defined in cet.h */ { @@ -171,8 +194,8 @@ cet_cs_vec_t cet_cs_vec_cp1253 = /* defined in cet.h */ cet_ucs4_to_cp1253_links, /* UCS-4 to char links */ cet_ucs4_to_cp1253_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1253_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1253_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1254.h b/cet/cp1254.h index 55b75b6db..085320898 100644 --- a/cet/cp1254.h +++ b/cet/cp1254.h @@ -95,10 +95,75 @@ const cet_ucs4_link_t cet_ucs4_to_cp1254_links[cet_ucs4_to_cp1254_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1254_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1254_extra[cet_ucs4_to_cp1254_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1254.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1254_extra[] = +{ + {0x0191, 0x83} /* latin capital letter f with hook */, + {0x02bb, 0x91} /* modifier letter turned comma */, + {0x02c9, 0xaf} /* modifier letter macron */, + {0x02ca, 0xb4} /* modifier letter acute accent */, + {0x02da, 0xb0} /* ring above */, + {0x02db, 0xb8} /* ogonek */, + {0x02dd, 0xa8} /* double acute accent */, + {0x0301, 0xb4} /* combining acute accent */, + {0x0303, 0x98} /* combining tilde */, + {0x0304, 0xaf} /* combining macron */, + {0x0305, 0xaf} /* combining overline */, + {0x0306, 0x88} /* combining breve */, + {0x0307, 0xb7} /* combining dot above */, + {0x0308, 0xa8} /* combining diaeresis */, + {0x030a, 0xa7} /* combining ring above */, + {0x030c, 0x88} /* combining caron */, + {0x030e, 0xa8} /* combining double vertical line above */, + {0x0327, 0xb8} /* combining cedilla */, + {0x03b2, 0xdf} /* greek small letter beta */, + {0x03bc, 0xb5} /* greek small letter mu */, + {0x2024, 0x95} /* one dot leader */, + {0x2033, 0xa8} /* double prime */, + {0x2070, 0xb0} /* superscript zero */, + {0x2080, 0xb0} /* subscript zero */, + {0x2082, 0xb2} /* subscript two */, + {0x2083, 0xb3} /* subscript three */, + {0x20a4, 0xa3} /* lira sign */, + {0x212b, 0xc5} /* angstrom sign */, + {0x2190, 0x8b} /* leftwards arrow */, + {0x2192, 0x9b} /* rightwards arrow */, + {0x2205, 0xd8} /* empty set */, + {0x2213, 0xb1} /* minus-or-plus sign */, + {0x2218, 0xb0} /* ring operator */, + {0x2219, 0x95} /* bullet operator */, + {0x226a, 0xab} /* much less-than */, + {0x226b, 0xbb} /* much greater-than */, + {0x22c5, 0xb7} /* dot operator */, + {0x2302, 0xa6} /* house */, + {0x2500, 0xa6} /* box drawings light horizontal */, + {0x2510, 0xac} /* box drawings light down and left */, + {0x2551, 0xa6} /* box drawings double vertical */, + {0x2557, 0xac} /* box drawings double down and left */, + {0x2560, 0xa6} /* box drawings double vertical and right */, + {0x2563, 0xa6} /* box drawings double vertical and left */, + {0x2569, 0xa6} /* box drawings double up and horizontal */, + {0x25a0, 0xa6} /* black square */, + {0x25bc, 0xa1} /* black down-pointing triangle */, + {0x25d8, 0x95} /* inverse bullet */, + {0x2660, 0xa6} /* black spade suit */, + {0x2663, 0xa6} /* black club suit */, + {0x2665, 0xa6} /* black heart suit */, + {0x2666, 0xa6} /* black diamond suit */, + {0x275b, 0x91} /* heavy single turned comma quotation mark ornament */, + {0x275c, 0x92} /* heavy single comma quotation mark ornament */, + {0x275d, 0x93} /* heavy double turned comma quotation mark ornament */, + {0x275e, 0x94} /* heavy double comma quotation mark ornament */, + {0x300a, 0xab} /* left double angle bracket */, + {0x300b, 0xbb} /* right double angle bracket */, + {0x301f, 0x84} /* low double prime quotation mark */, + {0x30fb, 0xb7} /* katakana middle dot */, + {0x30fc, 0x97} /* katakana-hiragana prolonged sound mark */ +}; + +#define cet_ucs4_to_cp1254_extra_ct sizeof(cet_ucs4_to_cp1254_extra) / sizeof(cet_ucs4_to_cp1254_extra[0]) cet_cs_vec_t cet_cs_vec_cp1254 = /* defined in cet.h */ { @@ -115,8 +180,8 @@ cet_cs_vec_t cet_cs_vec_cp1254 = /* defined in cet.h */ cet_ucs4_to_cp1254_links, /* UCS-4 to char links */ cet_ucs4_to_cp1254_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1254_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1254_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1255.h b/cet/cp1255.h index d80498da3..c078cefa3 100644 --- a/cet/cp1255.h +++ b/cet/cp1255.h @@ -112,10 +112,42 @@ const cet_ucs4_link_t cet_ucs4_to_cp1255_links[cet_ucs4_to_cp1255_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1255_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1255_extra[cet_ucs4_to_cp1255_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1255.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1255_extra[] = +{ + {0x02c6, 0x88} /* modifier letter circumflex */, + {0x02dc, 0x98} /* spacing tilde */, + {0x05b0, 0xc0} /* hebrew point sheva */, + {0x05b1, 0xc1} /* hebrew point hataf segol */, + {0x05b2, 0xc2} /* hebrew point hataf patah */, + {0x05b3, 0xc3} /* hebrew point hataf qamats */, + {0x05b4, 0xc4} /* hebrew point hiriq */, + {0x05b5, 0xc5} /* hebrew point tsere */, + {0x05b6, 0xc6} /* hebrew point segol */, + {0x05b7, 0xc7} /* hebrew point patah */, + {0x05b8, 0xc8} /* hebrew point qamats */, + {0x05b9, 0xc9} /* hebrew point holam */, + {0x05ba, 0xca} /* hebrew point ???? */, + {0x05bb, 0xcb} /* hebrew point qubuts */, + {0x05bc, 0xcc} /* hebrew point dagesh */, + {0x05bd, 0xcd} /* hebrew point meteg */, + {0x05be, 0xce} /* hebrew punctuation maqaf */, + {0x05bf, 0xcf} /* hebrew point rafe */, + {0x05c0, 0xd0} /* hebrew point paseq */, + {0x05c1, 0xd1} /* hebrew point shin dot */, + {0x05c2, 0xd2} /* hebrew point sin dot */, + {0x05c3, 0xd3} /* hebrew punctuation sof pasuq */, + {0x05f0, 0xd4} /* hebrew ligature yiddish double vav */, + {0x05f1, 0xd5} /* hebrew ligature yiddish vav yod */, + {0x05f2, 0xd6} /* hebrew ligature yiddish double yod */, + {0x05f3, 0xd7} /* hebrew punctuation geresh */, + {0x05f4, 0xd8} /* hebrew punctuation gershayim */, + {0x20aa, 0xa4} /* new sheqel sign */ +}; + +#define cet_ucs4_to_cp1255_extra_ct sizeof(cet_ucs4_to_cp1255_extra) / sizeof(cet_ucs4_to_cp1255_extra[0]) cet_cs_vec_t cet_cs_vec_cp1255 = /* defined in cet.h */ { @@ -132,8 +164,8 @@ cet_cs_vec_t cet_cs_vec_cp1255 = /* defined in cet.h */ cet_ucs4_to_cp1255_links, /* UCS-4 to char links */ cet_ucs4_to_cp1255_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1255_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1255_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1256.h b/cet/cp1256.h index b4d8941c5..89bc69652 100644 --- a/cet/cp1256.h +++ b/cet/cp1256.h @@ -142,10 +142,32 @@ const cet_ucs4_link_t cet_ucs4_to_cp1256_links[cet_ucs4_to_cp1256_ct] = {0x3113, 0xd0} /* letter zh */ }; -/* -#define cet_ucs4_to_cp1256_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1256_extra[cet_ucs4_to_cp1256_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1256.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1256_extra[] = +{ + {0x0152, 0x8c} /* latin capital letter o e */, + {0x0153, 0x9c} /* latin small letter o e */, + {0x0192, 0x83} /* latin small letter script f */, + {0x02c6, 0x88} /* modifier letter circumflex */, + {0x060c, 0xa1} /* arabic comma */, + {0x0638, 0xd9} /* arabic letter dhah */, + {0x0679, 0x8a} /* arabic letter tteh */, + {0x0688, 0x8f} /* arabic letter ddal */, + {0x0691, 0x9a} /* arabic letter rreh */, + {0x06a9, 0x98} /* arabic letter keheh */, + {0x06ba, 0x9f} /* arabic letter noon ghunna */, + {0x06be, 0xaa} /* arabic letter heh doachashmee */, + {0x06c1, 0xc0} /* arabic letter heh goal */, + {0x06cc, 0xed} /* best-fit : farsi yeh -> arabic yeh (u+064a) */, + {0x06d2, 0xff} /* arabic letter yeh barree */, + {0x200c, 0x9d} /* zero width non-joiner */, + {0x200d, 0x9e} /* zero width joiner */, + {0x2030, 0x89} /* per mille sign */ +}; + +#define cet_ucs4_to_cp1256_extra_ct sizeof(cet_ucs4_to_cp1256_extra) / sizeof(cet_ucs4_to_cp1256_extra[0]) cet_cs_vec_t cet_cs_vec_cp1256 = /* defined in cet.h */ { @@ -162,8 +184,8 @@ cet_cs_vec_t cet_cs_vec_cp1256 = /* defined in cet.h */ cet_ucs4_to_cp1256_links, /* UCS-4 to char links */ cet_ucs4_to_cp1256_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1256_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1256_extra_ct, /* number of extra links */ NULL /* for internal use */ }; diff --git a/cet/cp1257.h b/cet/cp1257.h index 3e9355fd1..a629396cc 100644 --- a/cet/cp1257.h +++ b/cet/cp1257.h @@ -130,10 +130,17 @@ const cet_ucs4_link_t cet_ucs4_to_cp1257_links[cet_ucs4_to_cp1257_ct] = {0x2122, 0x99} /* mark sign */ }; -/* -#define cet_ucs4_to_cp1257_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_cp1257_extra[cet_ucs4_to_cp1257_extra_ct] = {}; -*/ +/* Extra table was generated from bestfit1257.txt located at + ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ + +const cet_ucs4_link_t cet_ucs4_to_cp1257_extra[] = +{ + {0x02c7, 0x8e} /* hacek */, + {0x02d9, 0xff} /* dot above */, + {0x02db, 0x9e} /* ogonek */, +}; + +#define cet_ucs4_to_cp1257_extra_ct sizeof(cet_ucs4_to_cp1257_extra) / sizeof(cet_ucs4_to_cp1257_extra[0]) cet_cs_vec_t cet_cs_vec_cp1257 = /* defined in cet.h */ { @@ -150,8 +157,8 @@ cet_cs_vec_t cet_cs_vec_cp1257 = /* defined in cet.h */ cet_ucs4_to_cp1257_links, /* UCS-4 to char links */ cet_ucs4_to_cp1257_ct, /* number of links */ - NULL, /* hand made UCS-4 links */ - 0, /* number of extra links */ + cet_ucs4_to_cp1257_extra, /* hand made UCS-4 links */ + cet_ucs4_to_cp1257_extra_ct, /* number of extra links */ NULL /* for internal use */ }; -- 2.30.2